'''
* This is the projet for Brtc LlmOps Platform
* @Author Leon-liao <liaosiliang@alltman.com>
* @Description //TODO 
* @File: 7_study_splitter_code.py
* @Time: 2025/8/27
* @All Rights Reserve By Brtc
'''
from langchain_community.document_loaders import  UnstructuredFileLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter, Language

#1、构建Markdown文档加载器并获取文档列表
loader = UnstructuredFileLoader("./5_study_recursive_splitter.py")
documents = loader.load()

#2、构建分割器
text_splitter = RecursiveCharacterTextSplitter.from_language(
    Language.PYTHON,
    chunk_size=500,
    chunk_overlap=50,
    add_start_index=True)

#3、分割文档列表
chunks = text_splitter.split_documents(documents)

#4、输出信息
for chunk in chunks:
    print("=================================================")
    print(f"块大小:{len(chunk.page_content)}, 元数据:{chunk.metadata}")
    print(chunk.page_content)
    print("=================================================")